{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "6455b906",
   "metadata": {},
   "source": [
    "## 导入所需的包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "38a3e5db",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-15T02:37:03.640587Z",
     "start_time": "2021-11-15T02:37:03.243471Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[33mcommit f814661261860e419b5af1b5a6c8ecadb19ea70e\u001b[m\u001b[33m (\u001b[m\u001b[1;36mHEAD -> \u001b[m\u001b[1;32mmaster\u001b[m\u001b[33m, \u001b[m\u001b[1;31morigin/master\u001b[m\u001b[33m, \u001b[m\u001b[1;31morigin/HEAD\u001b[m\u001b[33m)\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Nov 15 10:35:34 2021 +0800\r\n",
      "\r\n",
      "    debug: 时序xgb模型\r\n",
      "\r\n",
      "\u001b[33mcommit 240b35118c573b6810db4b05558c0034d524c294\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sun Nov 14 19:59:02 2021 +0800\r\n",
      "\r\n",
      "    时序模型: 时间单位(间隔)为week.\r\n",
      "\r\n",
      "\u001b[33mcommit 9a272d89994102b64db956a6777d1f8a7a2f8c61\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sun Nov 14 19:41:22 2021 +0800\r\n",
      "\r\n",
      "    时序lag和ewm特征优化.\r\n",
      "\r\n",
      "\u001b[33mcommit 73578947ba07a64408dd0e13e9b43b9b49ba44bd\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sun Nov 14 19:26:57 2021 +0800\r\n",
      "\r\n",
      "    时序数据: 周期为week的数据集特征优化.\r\n",
      "\r\n",
      "\u001b[33mcommit 85c96e3f5053aaa959bfce339779d8c15c27f372\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Nov 12 17:46:09 2021 +0800\r\n",
      "\r\n",
      "    debug: 拼表.\r\n",
      "\r\n",
      "\u001b[33mcommit 7a30d642b74eb164a5189826bef3011a414b96c2\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Nov 12 16:49:11 2021 +0800\r\n",
      "\r\n",
      "    增加Walmart Recruiting结果.\r\n",
      "\r\n",
      "\u001b[33mcommit 7c86e33f43a0547a927b79c3f88ef18b63c4128b\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Nov 9 15:12:00 2021 +0800\r\n",
      "\r\n",
      "    优化feature_combination显示.\r\n",
      "\r\n",
      "\u001b[33mcommit 7e73a518962ae916d3122b53d19f7c799bf6aaeb\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Nov 8 19:14:05 2021 +0800\r\n",
      "\r\n",
      "    优化代码格式.\r\n",
      "\r\n",
      "\u001b[33mcommit 01c427e7d02c59c6658320f942bea5d0bfb472cf\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Nov 8 19:08:39 2021 +0800\r\n",
      "\r\n",
      "    get_submit_ts\r\n",
      "\r\n",
      "\u001b[33mcommit d2d50eefbc8557f1721ce30bca26f7a24657e780\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Nov 8 16:31:41 2021 +0800\r\n",
      "\r\n",
      "    增加断言.\r\n",
      "\r\n",
      "\u001b[33mcommit 70bccfc846e8caff59e47337bf7fd17dbab175a0\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Nov 8 16:28:09 2021 +0800\r\n",
      "\r\n",
      "    init 时序模型.\r\n",
      "\r\n",
      "\u001b[33mcommit 00d16c11b896463dad087191d6be40314f4d721a\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Nov 8 16:27:13 2021 +0800\r\n",
      "\r\n",
      "    时序模型.\r\n",
      "\r\n",
      "\u001b[33mcommit ad9a91db9c4e9c67acff8f4feba684eaf8837f55\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Nov 5 18:57:51 2021 +0800\r\n",
      "\r\n",
      "    add feature: exp weighted mean.\r\n",
      "\r\n",
      "\u001b[33mcommit 94ee626acfaba1dfbf494100a587bac7db6ba53a\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Nov 5 18:49:34 2021 +0800\r\n",
      "\r\n",
      "    add feature: FeatureRollingStatTS.\r\n",
      "\r\n",
      "\u001b[33mcommit 2c1530d74732f6ecb25c2ca5db1884e6dca34376\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Nov 5 17:32:08 2021 +0800\r\n",
      "\r\n",
      "    init fe_shiftts.\r\n",
      "\r\n",
      "\u001b[33mcommit 0446c87c009f4ea084fd13fea113095038c24ff8\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Nov 5 17:31:29 2021 +0800\r\n",
      "\r\n",
      "    时序数据特征: shift_ts.\r\n",
      "\r\n",
      "\u001b[33mcommit 1260175e5c4d6d08867443afbbc9b8de045090f0\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Nov 5 16:48:21 2021 +0800\r\n",
      "\r\n",
      "    开发时序数据集\r\n",
      "\r\n",
      "\u001b[33mcommit f39ad3e4fd22f755b9e9e7cadb953af7cec74a55\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Nov 2 14:27:20 2021 +0800\r\n",
      "\r\n",
      "    readme: 效果表调整顺序.\r\n",
      "\r\n",
      "\u001b[33mcommit fb3147a403ac13725d9fa18e6690cc85f753903c\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Nov 2 14:25:31 2021 +0800\r\n",
      "\r\n",
      "    readme: 效果对比去除index.\r\n",
      "\r\n",
      "\u001b[33mcommit 3489d0e8e7899a82480dba539889a50a5f95a038\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Nov 1 20:27:32 2021 +0800\r\n",
      "\r\n",
      "    add demo: elo.\r\n",
      "\r\n",
      "\u001b[33mcommit d71cf3537b30085e68059d00898660a478cfb5bd\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Nov 1 20:25:09 2021 +0800\r\n",
      "\r\n",
      "    更新elo结果.\r\n",
      "\r\n",
      "\u001b[33mcommit cb44541a782e388eb3dbf58cc091693c74f287db\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Nov 1 17:57:48 2021 +0800\r\n",
      "\r\n",
      "    FE_One2M合并到最终特征.\r\n",
      "\r\n",
      "\u001b[33mcommit 9b55fafc7f90a7b4c783367e433aab0c0ee96fe1\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Nov 1 17:35:57 2021 +0800\r\n",
      "\r\n",
      "    增加多表one2M特征.\r\n",
      "\r\n",
      "\u001b[33mcommit 0683174389154dde211585092f9b6dc46997c1be\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Nov 1 17:01:11 2021 +0800\r\n",
      "\r\n",
      "    add feature: fe_one2M.\r\n",
      "\r\n",
      "\u001b[33mcommit f9fd40e13e5734f960082d384bd286cd8979241c\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sat Oct 30 15:04:17 2021 +0800\r\n",
      "\r\n",
      "    debug log1p.\r\n",
      "\r\n",
      "\u001b[33mcommit cd68272c961136dc1ebf05a1ff3349cb3cf3a0f9\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sat Oct 30 13:22:34 2021 +0800\r\n",
      "\r\n",
      "    label的最小值大于等于0才进行log1p的操作.\r\n",
      "\r\n",
      "\u001b[33mcommit 08793133bdf0cea388110e4417c548644fdb65d7\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Oct 29 20:14:05 2021 +0800\r\n",
      "\r\n",
      "    debug: string type recognition.\r\n",
      "\r\n",
      "\u001b[33mcommit 1723eb3ac9237b8c048712f220b41c0c0042283b\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Oct 29 17:32:17 2021 +0800\r\n",
      "\r\n",
      "    debug: string type recognition.\r\n",
      "\r\n",
      "\u001b[33mcommit c4ee0ccef973b048021e221c47eebbd19ff2e036\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Oct 29 13:03:12 2021 +0800\r\n",
      "\r\n",
      "    编辑当前数据集.\r\n",
      "\r\n",
      "\u001b[33mcommit 7f03197d48ac11a85ed4200a2afe68e9ac974479\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Oct 29 10:59:48 2021 +0800\r\n",
      "\r\n",
      "    test stumbleupon.\r\n",
      "\r\n",
      "\u001b[33mcommit c4db091278835a16fd5f4dd9f1ea2c598d7b7ac9\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Oct 29 10:53:33 2021 +0800\r\n",
      "\r\n",
      "    优化run.py\r\n",
      "\r\n",
      "\u001b[33mcommit 26e9cc1638eb97e25d6215379a31267f2696b3ab\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Oct 29 10:36:50 2021 +0800\r\n",
      "\r\n",
      "    增加数据计时.\r\n",
      "\r\n",
      "\u001b[33mcommit 4c28ebfad270121eebd3d82c27a32c57300c965f\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Oct 29 08:13:30 2021 +0800\r\n",
      "\r\n",
      "    mv run.py to ../\r\n",
      "\r\n",
      "\u001b[33mcommit 073351158e07c75aeea21bb95960e81365fa08ce\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Oct 29 08:07:02 2021 +0800\r\n",
      "\r\n",
      "    debug submit.py\r\n",
      "\r\n",
      "\u001b[33mcommit feadab8973f6168d2bd5464df93c7debf8764738\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Oct 29 07:37:09 2021 +0800\r\n",
      "\r\n",
      "    xgb模型取消gpu模式:部分机器或集群可能由于没有gpu导致运行失败.\r\n",
      "\r\n",
      "\u001b[33mcommit c3b75de299e12983f96ca8c8b9fbf032e8b7d566\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Oct 28 22:57:53 2021 +0800\r\n",
      "\r\n",
      "    集群执行代码.\r\n",
      "\r\n",
      "\u001b[33mcommit d8cfa0e67af9068e71889101ef000cc4e61f5746\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Oct 25 17:30:25 2021 +0800\r\n",
      "\r\n",
      "    add demo: Restaurant Revenue.\r\n",
      "\r\n",
      "\u001b[33mcommit d2d7e7a4ac047edc27a7e500317329c2138c7d53\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Oct 25 17:12:34 2021 +0800\r\n",
      "\r\n",
      "    debug: txt feature type recognition.\r\n",
      "\r\n",
      "\u001b[33mcommit d57133ffcd0534aeab567d15ac3fa5e251359ace\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Oct 25 15:41:27 2021 +0800\r\n",
      "\r\n",
      "    debug, feature type recognition: txt.\r\n",
      "\r\n",
      "\u001b[33mcommit a5a84e21b675f52a1d384f932239b35968eb4f7d\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Oct 25 14:48:50 2021 +0800\r\n",
      "\r\n",
      "    add demo: benz.\r\n",
      "\r\n",
      "\u001b[33mcommit 8f8a9199ebb66b3af8b8f00ca0aaf1fd8deb5f4b\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Oct 20 09:25:38 2021 +0800\r\n",
      "\r\n",
      "    add Allstate autox demo.\r\n",
      "\r\n",
      "\u001b[33mcommit 3c7c236469a71678d8e2aa4a50ddf54f477be76d\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Oct 20 09:24:16 2021 +0800\r\n",
      "\r\n",
      "    update Allstate result.\r\n",
      "\r\n",
      "\u001b[33mcommit ed01e064817616c76610b765b67f70a449206c2e\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Oct 19 11:18:10 2021 +0800\r\n",
      "\r\n",
      "    log1p\r\n",
      "\r\n",
      "\u001b[33mcommit d82b523e99c59938bbac626825eafb81f873d56f\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Oct 18 17:21:06 2021 +0800\r\n",
      "\r\n",
      "    调参设置为false.\r\n",
      "\r\n",
      "\u001b[33mcommit 9501dc35a4cc479e9309f92085966e2d41892048\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Oct 18 17:08:22 2021 +0800\r\n",
      "\r\n",
      "    modify xgboost parameters.\r\n",
      "\r\n",
      "\u001b[33mcommit 9a07bb9af8dd03d28f170729ab375c8d08049ec7\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Oct 18 15:27:07 2021 +0800\r\n",
      "\r\n",
      "    debug: MSE的计算.\r\n",
      "\r\n",
      "\u001b[33mcommit eca729a754ad975a367fc901d8da56e1bfdf7659\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Oct 18 15:22:26 2021 +0800\r\n",
      "\r\n",
      "    优化xgb模型的log.\r\n",
      "\r\n",
      "\u001b[33mcommit c1d3c3fb2669f6eb3613de6753048a784adb4d51\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Oct 18 09:10:36 2021 +0800\r\n",
      "\r\n",
      "    regressor, 增加metric配置.\r\n",
      "\r\n",
      "\u001b[33mcommit 96fb99defa13a3f379a56220984995a84c07fadb\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Oct 18 08:38:59 2021 +0800\r\n",
      "\r\n",
      "    优化pipeline, 特征合并.\r\n",
      "\r\n",
      "\u001b[33mcommit 39373c28489f349937c8b0c56cc3825dcc885f83\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Oct 18 08:30:27 2021 +0800\r\n",
      "\r\n",
      "    更新pipeline.\r\n",
      "\r\n",
      "\u001b[33mcommit b2637e3039460d09224abd0d9e7ec129be625965\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sun Oct 17 13:50:59 2021 +0800\r\n",
      "\r\n",
      "    add case: Allstate.\r\n",
      "\r\n",
      "\u001b[33mcommit 2f39a6de3d26508ac694db8ddfe38d5a70da414f\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sun Oct 17 10:44:58 2021 +0800\r\n",
      "\r\n",
      "    debug feature: fe_time.\r\n",
      "\r\n",
      "\u001b[33mcommit 6c07c52fd8aad4cc83e8eb5acebe6df2954e3189\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sun Oct 17 10:43:48 2021 +0800\r\n",
      "\r\n",
      "    debug, fe_time.\r\n",
      "\r\n",
      "\u001b[33mcommit c63350a2ed12c0018df3e03bb6f03d15654e9982\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sun Oct 17 10:39:56 2021 +0800\r\n",
      "\r\n",
      "    add feature: fe_time.\r\n",
      "\r\n",
      "\u001b[33mcommit 9237f8571dae8f221d9ea31e27b40a1d58de8cf5\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sun Oct 17 10:28:03 2021 +0800\r\n",
      "\r\n",
      "    debug.\r\n",
      "\r\n",
      "\u001b[33mcommit 83cec4c0a6eb8377076ac351e4704dc29feeff55\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sun Oct 17 10:24:01 2021 +0800\r\n",
      "\r\n",
      "    debug datetime feature type.\r\n",
      "\r\n",
      "\u001b[33mcommit 926144780b3966e039e165096427f3c413ff8c0e\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sun Oct 17 10:21:07 2021 +0800\r\n",
      "\r\n",
      "    debug detect datetime feature type.\r\n",
      "\r\n",
      "\u001b[33mcommit 74abc938221fd49d313ffd5cb66318d2cbe154af\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sat Oct 16 21:29:51 2021 +0800\r\n",
      "\r\n",
      "    debug.\r\n",
      "\r\n",
      "\u001b[33mcommit 21f25e9a66a05caac6ebeb3ae9314a88edd9116a\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sat Oct 16 18:00:42 2021 +0800\r\n",
      "\r\n",
      "    重命名1-1拼表特征的列名\r\n",
      "\r\n",
      "\u001b[33mcommit 0c1cf483685645f6ceeea03fe1f6701cffa4850e\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sat Oct 16 08:18:18 2021 +0800\r\n",
      "\r\n",
      "    add grocery_sales results and demos.\r\n",
      "\r\n",
      "\u001b[33mcommit bb8deaa13fbd261e8dc98f04dc2574b05b985bcb\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Oct 8 20:07:45 2021 +0800\r\n",
      "\r\n",
      "    增加ventilator和Santander上分点总结；更新ventilator结果；\r\n",
      "\r\n",
      "\u001b[33mcommit de84f292155bce87fe44469502ba9263de934be1\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Sep 30 08:20:04 2021 +0800\r\n",
      "\r\n",
      "    优化diff和shift特征\r\n",
      "\r\n",
      "\u001b[33mcommit 50ab24fbcff59134d874e461652b92f60aa402d0\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Sep 30 08:19:38 2021 +0800\r\n",
      "\r\n",
      "    add cumsum feature\r\n",
      "\r\n",
      "\u001b[33mcommit 91f4e68e545fc91284219855cffc2e7c679ce087\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Sep 29 20:44:05 2021 +0800\r\n",
      "\r\n",
      "    debug for denoising autoencoder.\r\n",
      "\r\n",
      "\u001b[33mcommit d9870fb170c6f837818ff406d13fe25634d44a8a\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Sep 29 20:33:36 2021 +0800\r\n",
      "\r\n",
      "    add shift feature; add diff featuers; add ventilator demos.\r\n",
      "\r\n",
      "\u001b[33mcommit 70a9128f32a988454c49651716b7afbd109fa929\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Sep 24 19:51:55 2021 +0800\r\n",
      "\r\n",
      "    updata santander result.\r\n",
      "\r\n",
      "\u001b[33mcommit db903d295e59e38c749ac45381347561caf40545\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Sep 24 17:18:50 2021 +0800\r\n",
      "\r\n",
      "    init FeatureDenoisingAutoencoder.\r\n",
      "\r\n",
      "\u001b[33mcommit 08cc35212f2e0d463724cc6b27597253f1329105\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Sep 24 17:13:57 2021 +0800\r\n",
      "\r\n",
      "    denoising autoencoder特征.\r\n",
      "\r\n",
      "\u001b[33mcommit 410ef74d5756873d890caa07274b8ee0e1d746c5\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Sep 23 16:58:48 2021 +0800\r\n",
      "\r\n",
      "    modify README.md; add stumbleupon demo.\r\n",
      "\r\n",
      "\u001b[33mcommit 755a5bc45c760238e6e4309f1985959a4c5364b1\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Sep 22 11:37:03 2021 +0800\r\n",
      "\r\n",
      "    全流程中加入nlp特征; 增加stumbleupon的demo.\r\n",
      "\r\n",
      "\u001b[33mcommit 4d39f318551660fe139878a5c88db1d3aaac0b97\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Sep 17 20:53:19 2021 +0800\r\n",
      "\r\n",
      "    一键执行逻辑中增加nlp特征.\r\n",
      "\r\n",
      "\u001b[33mcommit 55f67716c7a50366d45249c0f9d3ea4166c13c41\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Sep 17 20:48:51 2021 +0800\r\n",
      "\r\n",
      "    1. 增加nlp特征; 2. 增加StumbleUpon案例结果.\r\n",
      "\r\n",
      "\u001b[33mcommit 74f9d849c0eadde63ff71ec5a79631d3af08fd7b\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Sep 17 16:05:38 2021 +0800\r\n",
      "\r\n",
      "    增加文本类型.\r\n",
      "\r\n",
      "\u001b[33mcommit e45f0b7c3855514177328519b574e7b969812ce4\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Sep 15 11:08:54 2021 +0800\r\n",
      "\r\n",
      "    modify README.md\r\n",
      "\r\n",
      "\u001b[33mcommit 7d4b34cc7cc584b7643d17b7c0bca7c8f92dbf7b\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Sep 14 17:19:09 2021 +0800\r\n",
      "\r\n",
      "    debug: 分解特征.\r\n",
      "\r\n",
      "\u001b[33mcommit 77bc1761c50bda21fce5bc21fff4115dd3d11443\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Sep 14 16:08:56 2021 +0800\r\n",
      "\r\n",
      "    init FeatureDimensionReduction.\r\n",
      "\r\n",
      "\u001b[33mcommit f8f277e7b8cd314127bb622484855fe8aac78cf5\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Sep 14 15:37:52 2021 +0800\r\n",
      "\r\n",
      "    降维特征.\r\n",
      "\r\n",
      "\u001b[33mcommit f56b86583be17aac48f69067644bec8bf624b57b\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Sep 14 11:26:25 2021 +0800\r\n",
      "\r\n",
      "    debug: xbg二分类模型改成预测概率而非硬分类.\r\n",
      "\r\n",
      "\u001b[33mcommit ff8c20b9742ad36c31ab5baec8c1f1211c61e8bf\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Sep 14 10:49:10 2021 +0800\r\n",
      "\r\n",
      "    add demo: kaggle springleaf\r\n",
      "\r\n",
      "\u001b[33mcommit ac4dcab227d8bcc17ecc2d69e5e0486a5f13d5f7\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Sep 14 10:37:49 2021 +0800\r\n",
      "\r\n",
      "    modify README.md: 更新springleaf一键执行结果.\r\n",
      "\r\n",
      "\u001b[33mcommit 26eb6bea33b8e8428ee1afcd4403ccae2948724e\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Sep 10 19:10:12 2021 +0800\r\n",
      "\r\n",
      "    debug: task_type\r\n",
      "\r\n",
      "\u001b[33mcommit 58820caad3acc6d2b1fae7a81e051d3fb30f13d3\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Sep 10 14:35:18 2021 +0800\r\n",
      "\r\n",
      "    优化log\r\n",
      "\r\n",
      "\u001b[33mcommit 51704abfd80114578eab318356cc77b1ef46e18b\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Sep 10 14:30:35 2021 +0800\r\n",
      "\r\n",
      "    1. 增加case: kaggle springleaf;\r\n",
      "    2. 优化autox get_submit逻辑\r\n",
      "\r\n",
      "\u001b[33mcommit 6455e62326d344b33a37f100b4fecf2dcb637c8a\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Sep 2 16:50:56 2021 +0800\r\n",
      "\r\n",
      "    增加ieee结果和pipeline demo.\r\n",
      "\r\n",
      "\u001b[33mcommit 74d679c47ae2e0639d02b994e6cf1f6f84dfe560\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Sep 2 15:44:15 2021 +0800\r\n",
      "\r\n",
      "    debug for feature_filter.\r\n",
      "\r\n",
      "\u001b[33mcommit 75c9510e049cfdbaa57f07b3f4306f1a161fccea\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Sep 2 14:48:42 2021 +0800\r\n",
      "\r\n",
      "    优化groupby key筛选条件.\r\n",
      "\r\n",
      "\u001b[33mcommit ff2bb3fb04a5b84feca94b26de7ac6048cc36c7b\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Sep 1 17:37:03 2021 +0800\r\n",
      "\r\n",
      "    debug: fe_rank\r\n",
      "\r\n",
      "\u001b[33mcommit 1fe1f1732606a5dbf007270c2dbae1711b5a72b6\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Sep 1 16:27:16 2021 +0800\r\n",
      "\r\n",
      "    debug: 拼接1-1简单表.\r\n",
      "\r\n",
      "\u001b[33mcommit c7b7964fb2713118d6e85d0ef22a384f924143be\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Sep 1 16:04:23 2021 +0800\r\n",
      "\r\n",
      "    增加功能，拼接1-1简单表;\r\n",
      "    kaggle_ieee, demo;\r\n",
      "    modify README.md.\r\n",
      "\r\n",
      "\u001b[33mcommit 21457fafb8d01644cfc668d0aab8d463a8cda3e7\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Aug 31 15:45:24 2021 +0800\r\n",
      "\r\n",
      "    modify README\r\n",
      "\r\n",
      "\u001b[33mcommit 2c2cf54574a8a9c6c21f5452ed5f5bcf4b3ae7ef\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Aug 31 15:25:09 2021 +0800\r\n",
      "\r\n",
      "    modify README_EN.md\r\n",
      "\r\n",
      "\u001b[33mcommit 3da3ba229d78d81d844226c0d584d1da6572109a\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Aug 30 20:34:13 2021 +0800\r\n",
      "\r\n",
      "    init Fe_rank.\r\n",
      "\r\n",
      "\u001b[33mcommit 7f2e3717b84ebdd223037ce2ac63740d46571a9a\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Aug 30 17:32:12 2021 +0800\r\n",
      "\r\n",
      "    add rank feature.\r\n",
      "\r\n",
      "\u001b[33mcommit b3fa6719c0052b964f0d74a6bf9a8941c488d915\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Aug 30 10:43:40 2021 +0800\r\n",
      "\r\n",
      "    add demo: kaggle house price.\r\n",
      "\r\n",
      "\u001b[33mcommit 59b7d261d059f84a291fda6013f7eeffdcae9987\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sun Aug 29 08:03:49 2021 +0800\r\n",
      "\r\n",
      "    modify README_EN.md, 跳转链接.\r\n",
      "\r\n",
      "\u001b[33mcommit f2581a891bc919b3c0f11fb4c7cf700ecedc2f73\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sun Aug 29 07:57:44 2021 +0800\r\n",
      "\r\n",
      "    modify README_EN.md\r\n",
      "\r\n",
      "\u001b[33mcommit 50b186979fa431cdfaed38a508f1a134d2e7e0f1\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Aug 27 17:44:44 2021 +0800\r\n",
      "\r\n",
      "    modify README.md, 新增kaggle house price数据集.\r\n",
      "\r\n",
      "\u001b[33mcommit 08e2dc8e069ffdd5f5dea5870af971d7b2cbe1df\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Aug 27 16:39:01 2021 +0800\r\n",
      "\r\n",
      "    install_requires, 忽略tabnet.\r\n",
      "\r\n",
      "\u001b[33mcommit 89611b6d10d4492cde5b6f390d2c4077977b66f1\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Aug 27 15:52:55 2021 +0800\r\n",
      "\r\n",
      "    xgb打印轮次设置为100\r\n",
      "\r\n",
      "\u001b[33mcommit 984d81a49150edd80d177137ceb748591db1a04d\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Aug 27 15:42:14 2021 +0800\r\n",
      "\r\n",
      "    回归模型调参,修改验证集切分方式.\r\n",
      "\r\n",
      "\u001b[33mcommit 7fb12ebf09d04b0a617896ab1a63474fc8bb55a5\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Aug 27 15:09:13 2021 +0800\r\n",
      "\r\n",
      "    优化特征类型识别.\r\n",
      "\r\n",
      "\u001b[33mcommit 39f94e1e4cd82c6d148556d80bb47146bfc8d539\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Aug 27 14:58:53 2021 +0800\r\n",
      "\r\n",
      "    优化特征类型识别.\r\n",
      "\r\n",
      "\u001b[33mcommit 9f78099656c32f335a9134ea6358f131891699f0\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Aug 26 17:02:52 2021 +0800\r\n",
      "\r\n",
      "    modify readme.\r\n",
      "\r\n",
      "\u001b[33mcommit 656c91218b289fe61cf21855e16b6895acec2c78\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Aug 24 15:21:30 2021 +0800\r\n",
      "\r\n",
      "    优化readme,增加zhidemai比赛上分点总结\r\n",
      "\r\n",
      "\u001b[33mcommit 0aa3748f2a06d3639f6afeb94e33bca7d0bdeea8\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Aug 20 14:33:54 2021 +0800\r\n",
      "\r\n",
      "    modify README.md\r\n",
      "\r\n",
      "\u001b[33mcommit 0cfd6d5cf86fee9b1a02b79a94ffd97c0b8a166a\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Aug 19 20:14:02 2021 +0800\r\n",
      "\r\n",
      "    setup安装包增加tabnet.\r\n",
      "\r\n",
      "\u001b[33mcommit 84af1a14acd1e121bfcae1afde06f00b80df614d\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Aug 18 11:52:08 2021 +0800\r\n",
      "\r\n",
      "    debug: tabnet的调参参数配置\r\n",
      "\r\n",
      "\u001b[33mcommit d185a546260127b5faffab105d3a2c0eaafb69bc\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Aug 17 21:19:57 2021 +0800\r\n",
      "\r\n",
      "    tabnet, reshape y\r\n",
      "\r\n",
      "\u001b[33mcommit 388c00372b8f4af10bed4212ccc1bdf6e3f54275\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Aug 17 21:01:59 2021 +0800\r\n",
      "\r\n",
      "    debug, tabnet.\r\n",
      "\r\n",
      "\u001b[33mcommit 18bc69af5802750f5ec23312e7ab649ddc25cfa8\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Aug 17 20:25:31 2021 +0800\r\n",
      "\r\n",
      "    tabnet: 缺失值用中位数填充.\r\n",
      "\r\n",
      "\u001b[33mcommit 5a88c0ff98338dddb8d1406eb1bfd0d2a72f2121\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Aug 17 19:37:39 2021 +0800\r\n",
      "\r\n",
      "    优化tabnet\r\n",
      "\r\n",
      "\u001b[33mcommit 1e07509db81c4c6e3222ae6697ef5c119a2eef31\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Aug 17 16:08:43 2021 +0800\r\n",
      "\r\n",
      "    bagging中增加tabnet模型\r\n",
      "\r\n",
      "\u001b[33mcommit fbed7f9fb73e4a9ac143398902fd042a6fa54247\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Aug 17 16:05:36 2021 +0800\r\n",
      "\r\n",
      "    tabnet regressor\r\n",
      "\r\n",
      "\u001b[33mcommit 8e89749c53b5ebddc86cb9b8ace762f7b3854841\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Aug 17 15:21:32 2021 +0800\r\n",
      "\r\n",
      "    debug模式下缩短调参时间。\r\n",
      "\r\n",
      "\u001b[33mcommit 9c70574e38194766cbecca0822b2cdd48867144b\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Aug 17 15:13:04 2021 +0800\r\n",
      "\r\n",
      "    debug模型打印日志.\r\n",
      "\r\n",
      "\u001b[33mcommit 7b18599f132699f056470761701968931da3f7a9\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Tue Aug 17 15:04:15 2021 +0800\r\n",
      "\r\n",
      "    增加debug模式，方便快速调试.\r\n",
      "\r\n",
      "\u001b[33mcommit d2d332b0bb5432e0ef49df01455c81a2644e7271\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Aug 16 08:00:11 2021 +0800\r\n",
      "\r\n",
      "    auto_label_encoder,设置silence_cols\r\n",
      "\r\n",
      "\u001b[33mcommit 1b8ced5337d1826ff6dadec235c8cc5a00cb4e89\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sun Aug 15 08:42:35 2021 +0800\r\n",
      "\r\n",
      "    内存优化.\r\n",
      "\r\n",
      "\u001b[33mcommit fab33ba59407c96ac146c4ad6865a32f06b8fa34\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Aug 11 10:53:16 2021 +0800\r\n",
      "\r\n",
      "    增加二分类模型.\r\n",
      "\r\n",
      "\u001b[33mcommit 373c58eb950fbc364581d75b493ecaa1735079ed\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Aug 9 15:28:00 2021 +0800\r\n",
      "\r\n",
      "    识别任务类型\r\n",
      "\r\n",
      "\u001b[33mcommit 44755fa33a1a6f59239786ab80de9d521c72b68c\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Aug 6 16:19:49 2021 +0800\r\n",
      "\r\n",
      "    lgb, Verbose = 100\r\n",
      "\r\n",
      "\u001b[33mcommit e18d2dd86b63d4e253b3ad67017aeb82546cead3\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Aug 6 13:21:03 2021 +0800\r\n",
      "\r\n",
      "    优化CrossXgbRegression.\r\n",
      "\r\n",
      "\u001b[33mcommit 6264775e9faec6d832cdb59819bca6534ced7401\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Aug 6 11:41:03 2021 +0800\r\n",
      "\r\n",
      "    优化CrossXgbRegression: X进行StandardScaler, debug.\r\n",
      "\r\n",
      "\u001b[33mcommit 53135e94a4a0a0b079bf83df3db8e687e5ce0dc5\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Aug 6 11:20:31 2021 +0800\r\n",
      "\r\n",
      "    优化CrossXgbRegression: X进行StandardScaler\r\n",
      "\r\n",
      "\u001b[33mcommit f0ac9242246a87dcee3e660c971cc33c5246f0bb\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Fri Aug 6 10:38:23 2021 +0800\r\n",
      "\r\n",
      "    优化CrossXgbRegression\r\n",
      "\r\n",
      "\u001b[33mcommit 75156d600497bb0910a8026aa87b2e7dc964ba79\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Aug 5 22:34:31 2021 +0800\r\n",
      "\r\n",
      "    xgb model: tree_method='gpu_hist'\r\n",
      "\r\n",
      "\u001b[33mcommit afb229aa554f632badee1a171c28c991968eb331\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Aug 5 20:58:11 2021 +0800\r\n",
      "\r\n",
      "    模型部分使用xgb和lgb融合\r\n",
      "\r\n",
      "\u001b[33mcommit 338ee3069d797cc10fe6d0f8a38afdc308cfdc71\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Aug 5 20:29:23 2021 +0800\r\n",
      "\r\n",
      "    del temp.py\r\n",
      "\r\n",
      "\u001b[33mcommit c0dbe0887b53f7e8f02a6a3e9bcde803059ae973\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Aug 5 19:53:05 2021 +0800\r\n",
      "\r\n",
      "    debug: X.iloc\r\n",
      "\r\n",
      "\u001b[33mcommit 2b806aedc6a2086268b1dadac4111ef1b1b1d83b\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Aug 5 19:23:17 2021 +0800\r\n",
      "\r\n",
      "    debug: xgb regressor\r\n",
      "\r\n",
      "\u001b[33mcommit 5afbd56bdfb29e33d52d908a03c85508ad4e3d08\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Aug 5 17:13:38 2021 +0800\r\n",
      "\r\n",
      "    xgboost不使用gpu_hist\r\n",
      "\r\n",
      "\u001b[33mcommit 1c56af83e7f90ec4fcc594fd87dcd0f6b9abaf8c\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Aug 5 17:10:11 2021 +0800\r\n",
      "\r\n",
      "    xgboost不使用gpu\r\n",
      "\r\n",
      "\u001b[33mcommit b25c37c96115d5845841df1781eabbf4345af621\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Aug 5 16:55:16 2021 +0800\r\n",
      "\r\n",
      "    增加xgb模型.\r\n",
      "\r\n",
      "\u001b[33mcommit 9af12e41e9a0a3295e63a0fe17988261e63050ee\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Aug 5 11:02:14 2021 +0800\r\n",
      "\r\n",
      "    get_submit, 优化模型训练部分\r\n",
      "\r\n",
      "\u001b[33mcommit d127c4e4a9b17f0244ec14af11482db60c261d21\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Aug 4 16:29:34 2021 +0800\r\n",
      "\r\n",
      "    debug: log输出.\r\n",
      "\r\n",
      "\u001b[33mcommit 3178f49c0ae81f5d9ba084d5dad1563804f457a4\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Aug 4 15:24:57 2021 +0800\r\n",
      "\r\n",
      "    增加模型调参功能.\r\n",
      "\r\n",
      "\u001b[33mcommit eb97b2420853e8e0fddd55343c6029ee6da8b4b3\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Wed Aug 4 15:02:27 2021 +0800\r\n",
      "\r\n",
      "    debug: concat_train_test操作在自动特征类型识别之后.\r\n",
      "\r\n",
      "\u001b[33mcommit 3452d8831ebe8d33f717ad16452109680aa8ef1f\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Aug 2 20:00:16 2021 +0800\r\n",
      "\r\n",
      "    调整target encoding的阈值.\r\n",
      "\r\n",
      "\u001b[33mcommit 0d66de573edd6485725d603620c1344bf41e6222\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Aug 2 19:48:30 2021 +0800\r\n",
      "\r\n",
      "    debug:del_targetencoding_cols去重.\r\n",
      "\r\n",
      "\u001b[33mcommit dc4df8ef3a70a0532578ee6e043223e1f219b60d\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Aug 2 19:45:35 2021 +0800\r\n",
      "\r\n",
      "    debug: del_targetencoding_cols去重.\r\n",
      "\r\n",
      "\u001b[33mcommit 4ecb985b6d6b46145743338c2ed3bd28e3f7977f\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Aug 2 19:34:08 2021 +0800\r\n",
      "\r\n",
      "    debug.\r\n",
      "\r\n",
      "\u001b[33mcommit a156ca854d341072d23df29623f82c819e5c5b81\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Aug 2 19:31:32 2021 +0800\r\n",
      "\r\n",
      "    target encoding特征筛选：test做了target encoding之后，有值的部分要大于90%\r\n",
      "\r\n",
      "\u001b[33mcommit ba93d457a017bb65bf3dc8d4676cea232a48c88c\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Jul 26 17:36:55 2021 +0800\r\n",
      "\r\n",
      "    内存优化, 优化log.\r\n",
      "\r\n",
      "\u001b[33mcommit 5cda17ae252012184192ba10ba941b7eabd1946d\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Jul 26 17:31:49 2021 +0800\r\n",
      "\r\n",
      "    内存优化.\r\n",
      "\r\n",
      "\u001b[33mcommit 0714e370615f919b92995b96eeea79dc475f1064\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Jul 26 14:52:50 2021 +0800\r\n",
      "\r\n",
      "    target encoding feature: 默认使用统计信息进行特征筛选\r\n",
      "\r\n",
      "\u001b[33mcommit cb928b678996a700a8e2c37ae725d5baab573558\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Jul 26 14:45:42 2021 +0800\r\n",
      "\r\n",
      "    target encoding feature: 优化统计信息筛选阈值\r\n",
      "\r\n",
      "\u001b[33mcommit e2a3e989e4b8b911663d871decfdcff05c818f45\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Mon Jul 26 14:39:50 2021 +0800\r\n",
      "\r\n",
      "    debug target encoding feature.\r\n",
      "\r\n",
      "\u001b[33mcommit 855c6c962ac0e9a716c9bc35441fee35bb89bf65\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Sat Jul 24 10:25:33 2021 +0800\r\n",
      "\r\n",
      "    add license file\r\n",
      "\r\n",
      "\u001b[33mcommit b5297ac2334e9b6d008d0e2d1c7a7e6b7dd61b78\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Jul 22 15:20:24 2021 +0800\r\n",
      "\r\n",
      "    modify README.md;\r\n",
      "    增加zhidemai_automl.ipynb.\r\n",
      "\r\n",
      "\u001b[33mcommit 8fb15db690010c060a11e3d28d5a9fdaa268113a\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Jul 22 14:36:09 2021 +0800\r\n",
      "\r\n",
      "    del sub files.\r\n",
      "\r\n",
      "\u001b[33mcommit 74ef3c0664934bb0033c178e50df9fee0986df55\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Jul 22 14:29:32 2021 +0800\r\n",
      "\r\n",
      "    first commit\r\n",
      "\r\n",
      "\u001b[33mcommit 4d75036cbf5db2927ba3233a9cdda4a32c022d85\u001b[m\r\n",
      "Author: poteman <946691288@qq.com>\r\n",
      "Date:   Thu Jul 22 14:26:45 2021 +0800\r\n",
      "\r\n",
      "    first commit\r\n"
     ]
    }
   ],
   "source": [
    "!git log"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9185f791",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-15T02:37:08.311655Z",
     "start_time": "2021-11-15T02:37:03.644172Z"
    }
   },
   "outputs": [],
   "source": [
    "from autox import AutoX\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fa24e429",
   "metadata": {},
   "source": [
    "## 配置数据信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "cb67a86f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-15T02:37:08.325756Z",
     "start_time": "2021-11-15T02:37:08.315299Z"
    }
   },
   "outputs": [],
   "source": [
    "# 选择数据集\n",
    "data_name = 'walmart_recruiting'\n",
    "path = f'../data/{data_name}'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e872528b",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-15T02:37:08.338315Z",
     "start_time": "2021-11-15T02:37:08.328430Z"
    }
   },
   "outputs": [],
   "source": [
    "# 数据表对应的数据列特征类型\n",
    "feature_type = {\n",
    "  \"stores.csv\": {\n",
    "        \"Store\": \"cat\",\n",
    "        \"Type\": \"cat\",\n",
    "        \"Size\": \"num\"\n",
    "    },\n",
    "    \"train.csv\": {\n",
    "        \"Store\": \"cat\",\n",
    "        \"Dept\": \"cat\",\n",
    "        \"Date\": \"datetime\",\n",
    "        \"Weekly_Sales\": \"num\",\n",
    "        \"IsHoliday\": \"num\"\n",
    "    },\n",
    "    \"features.csv\": {\n",
    "        \"Store\": \"cat\",\n",
    "        \"Date\": \"datetime\",\n",
    "        \"Temperature\": \"num\",\n",
    "        \"Fuel_Price\": \"num\",\n",
    "        \"MarkDown1\": \"num\",\n",
    "        \"MarkDown2\": \"num\",\n",
    "        \"MarkDown3\": \"num\",\n",
    "        \"MarkDown4\": \"num\",\n",
    "        \"MarkDown5\": \"num\",\n",
    "        \"CPI\": \"num\",\n",
    "        \"Unemployment\": \"num\",\n",
    "        \"IsHoliday\": \"num\"\n",
    "    },\n",
    "    \"test.csv\": {\n",
    "        \"Store\": \"cat\",\n",
    "        \"Dept\": \"cat\",\n",
    "        \"Date\": \"datetime\",\n",
    "        \"IsHoliday\": \"num\"\n",
    "    }\n",
    "}   \n",
    "\n",
    "relations = [\n",
    "    {\n",
    "            \"related_to_main_table\": \"true\", # 是否为和主表的关系\n",
    "            \"left_entity\": \"train.csv\",  # 左表名字\n",
    "            \"left_on\": [\"Store\"],  # 左表拼表键\n",
    "            \"right_entity\": \"stores.csv\",  # 右表名字\n",
    "            \"right_on\": [\"Store\"], # 右表拼表键\n",
    "            \"type\": \"1-1\" # 左表与右表的连接关系\n",
    "        },\n",
    "    {\n",
    "            \"related_to_main_table\": \"true\", # 是否为和主表的关系\n",
    "            \"left_entity\": \"test.csv\",  # 左表名字\n",
    "            \"left_on\": [\"Store\"],  # 左表拼表键\n",
    "            \"right_entity\": \"stores.csv\",  # 右表名字\n",
    "            \"right_on\": [\"Store\"], # 右表拼表键\n",
    "            \"type\": \"1-1\" # 左表与右表的连接关系\n",
    "        },\n",
    "    {\n",
    "            \"related_to_main_table\": \"true\", # 是否为和主表的关系\n",
    "            \"left_entity\": \"train.csv\",  # 左表名字\n",
    "            \"left_on\": [\"Store\", \"Date\"],  # 左表拼表键\n",
    "            \"right_entity\": \"features.csv\",  # 右表名字\n",
    "            \"right_on\": [\"Store\", \"Date\"], # 右表拼表键\n",
    "            \"type\": \"1-1\" # 左表与右表的连接关系\n",
    "        },\n",
    "    {\n",
    "            \"related_to_main_table\": \"true\", # 是否为和主表的关系\n",
    "            \"left_entity\": \"test.csv\",  # 左表名字\n",
    "            \"left_on\": [\"Store\", \"Date\"],  # 左表拼表键\n",
    "            \"right_entity\": \"features.csv\",  # 右表名字\n",
    "            \"right_on\": [\"Store\", \"Date\"], # 右表拼表键\n",
    "            \"type\": \"1-1\" # 左表与右表的连接关系\n",
    "        }\n",
    "]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b722824a",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-15T02:37:09.179538Z",
     "start_time": "2021-11-15T02:37:08.340504Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "   INFO ->  [+] read stores.csv\n",
      "   INFO ->  Memory usage of dataframe is 0.00 MB\n",
      "   INFO ->  Memory usage after optimization is: 0.00 MB\n",
      "   INFO ->  Decreased by 56.1%\n",
      "   INFO ->  table = stores.csv, shape = (45, 3)\n",
      "   INFO ->  [+] read train.csv\n",
      "   INFO ->  Memory usage of dataframe is 13.27 MB\n",
      "   INFO ->  Memory usage after optimization is: 4.03 MB\n",
      "   INFO ->  Decreased by 69.7%\n",
      "   INFO ->  table = train.csv, shape = (421570, 5)\n",
      "   INFO ->  [+] read features.csv\n",
      "   INFO ->  Memory usage of dataframe is 0.70 MB\n",
      "   INFO ->  Memory usage after optimization is: 0.26 MB\n",
      "   INFO ->  Decreased by 62.1%\n",
      "   INFO ->  table = features.csv, shape = (8190, 12)\n",
      "   INFO ->  [+] read test.csv\n",
      "   INFO ->  Memory usage of dataframe is 2.74 MB\n",
      "   INFO ->  Memory usage after optimization is: 0.55 MB\n",
      "   INFO ->  Decreased by 79.9%\n",
      "   INFO ->  table = test.csv, shape = (115064, 4)\n",
      "   INFO ->  [+] read sampleSubmission.csv\n",
      "   INFO ->  Memory usage of dataframe is 1.76 MB\n",
      "   INFO ->  Memory usage after optimization is: 5.46 MB\n",
      "   INFO ->  Decreased by -210.8%\n",
      "   INFO ->  table = sampleSubmission.csv, shape = (115064, 2)\n"
     ]
    }
   ],
   "source": [
    "autox = AutoX(target = 'Weekly_Sales', train_name = 'train.csv', test_name = 'test.csv', \n",
    "               id = ['Store', 'Dept'], path = path, time_series=True, ts_unit='W',time_col = 'Date',\n",
    "              feature_type = feature_type, relations = relations\n",
    "              ) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "fc6898fc",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-15T04:07:23.540076Z",
     "start_time": "2021-11-15T02:37:09.181359Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "   INFO ->  start feature engineer\n",
      "   INFO ->  feature engineer: one2M\n",
      "   INFO ->  featureOne2M ops: {}\n",
      "   INFO ->  ignore featureOne2M\n",
      "   INFO ->  feature engineer: time\n",
      "   INFO ->  featureTime ops: ['Date']\n",
      "100%|██████████| 1/1 [00:00<00:00,  1.79it/s]\n",
      "   INFO ->  feature engineer: ShiftTS\n",
      "   INFO ->  featureShiftTS ops: ['Weekly_Sales', 'IsHoliday', 'stores.csv__Size', 'features.csv__Temperature', 'features.csv__Fuel_Price', 'features.csv__MarkDown1', 'features.csv__MarkDown2', 'features.csv__MarkDown3', 'features.csv__MarkDown4', 'features.csv__MarkDown5', 'features.csv__CPI', 'features.csv__Unemployment', 'features.csv__IsHoliday']\n",
      "   INFO ->  featureShiftTS lags: [39, 40, 41, 42]\n",
      "13it [00:52,  4.02s/it]\n",
      "   INFO ->  feature engineer: RollingStatTS\n",
      "   INFO ->  featureRollingStatTS ops: ['Weekly_Sales', 'IsHoliday', 'stores.csv__Size', 'features.csv__Temperature', 'features.csv__Fuel_Price', 'features.csv__MarkDown1', 'features.csv__MarkDown2', 'features.csv__MarkDown3', 'features.csv__MarkDown4', 'features.csv__MarkDown5', 'features.csv__CPI', 'features.csv__Unemployment', 'features.csv__IsHoliday']\n",
      "   INFO ->  featureRollingStatTS windows: [42, 43, 44]\n",
      "100%|██████████| 13/13 [06:28<00:00, 29.86s/it]\n",
      "   INFO ->  feature engineer: ExpWeightedMean\n",
      "   INFO ->  featureExpWeightedMean ops: ['Weekly_Sales', 'IsHoliday', 'stores.csv__Size', 'features.csv__Temperature', 'features.csv__Fuel_Price', 'features.csv__MarkDown1', 'features.csv__MarkDown2', 'features.csv__MarkDown3', 'features.csv__MarkDown4', 'features.csv__MarkDown5', 'features.csv__CPI', 'features.csv__Unemployment', 'features.csv__IsHoliday']\n",
      "   INFO ->  featureExpWeightedMean lags: [39, 40, 41, 42]\n",
      "13it [01:18,  6.05s/it]\n",
      "100%|██████████| 17/17 [00:00<00:00, 348.65it/s]\n",
      "   INFO ->  label_encoder_list: ['stores.csv__Type']\n",
      "   INFO ->  feature combination\n",
      "100%|██████████| 5/5 [00:02<00:00,  2.32it/s]\n",
      "   INFO ->  shape of FE_all: (536634, 326), shape of train: (421570, 326), shape of test: (115064, 326)\n",
      "   INFO ->  feature filter\n",
      "100%|██████████| 325/325 [00:02<00:00, 111.14it/s]\n",
      "   INFO ->  filtered features: ['Store', 'Dept', 'Weekly_Sales']\n",
      "   INFO ->  used_features: ['IsHoliday', 'stores.csv__Type', 'stores.csv__Size', 'features.csv__Temperature', 'features.csv__Fuel_Price', 'features.csv__MarkDown1', 'features.csv__MarkDown2', 'features.csv__MarkDown3', 'features.csv__MarkDown4', 'features.csv__MarkDown5', 'features.csv__CPI', 'features.csv__Unemployment', 'features.csv__IsHoliday', 'Date_year', 'Date_month', 'Date_day', 'Date_hour', 'Date_weekofyear', 'Date_dayofweek', 'Date_is_wknd', 'Date_quarter', 'Date_is_month_start', 'Date_is_month_end', 'Store__Dept__Weekly_Sales__lag_39', 'Store__Dept__Weekly_Sales__lag_40', 'Store__Dept__Weekly_Sales__lag_41', 'Store__Dept__Weekly_Sales__lag_42', 'Store__Dept__IsHoliday__lag_39', 'Store__Dept__IsHoliday__lag_40', 'Store__Dept__IsHoliday__lag_41', 'Store__Dept__IsHoliday__lag_42', 'Store__Dept__stores.csv__Size__lag_39', 'Store__Dept__stores.csv__Size__lag_40', 'Store__Dept__stores.csv__Size__lag_41', 'Store__Dept__stores.csv__Size__lag_42', 'Store__Dept__features.csv__Temperature__lag_39', 'Store__Dept__features.csv__Temperature__lag_40', 'Store__Dept__features.csv__Temperature__lag_41', 'Store__Dept__features.csv__Temperature__lag_42', 'Store__Dept__features.csv__Fuel_Price__lag_39', 'Store__Dept__features.csv__Fuel_Price__lag_40', 'Store__Dept__features.csv__Fuel_Price__lag_41', 'Store__Dept__features.csv__Fuel_Price__lag_42', 'Store__Dept__features.csv__MarkDown1__lag_39', 'Store__Dept__features.csv__MarkDown1__lag_40', 'Store__Dept__features.csv__MarkDown1__lag_41', 'Store__Dept__features.csv__MarkDown1__lag_42', 'Store__Dept__features.csv__MarkDown2__lag_39', 'Store__Dept__features.csv__MarkDown2__lag_40', 'Store__Dept__features.csv__MarkDown2__lag_41', 'Store__Dept__features.csv__MarkDown2__lag_42', 'Store__Dept__features.csv__MarkDown3__lag_39', 'Store__Dept__features.csv__MarkDown3__lag_40', 'Store__Dept__features.csv__MarkDown3__lag_41', 'Store__Dept__features.csv__MarkDown3__lag_42', 'Store__Dept__features.csv__MarkDown4__lag_39', 'Store__Dept__features.csv__MarkDown4__lag_40', 'Store__Dept__features.csv__MarkDown4__lag_41', 'Store__Dept__features.csv__MarkDown4__lag_42', 'Store__Dept__features.csv__MarkDown5__lag_39', 'Store__Dept__features.csv__MarkDown5__lag_40', 'Store__Dept__features.csv__MarkDown5__lag_41', 'Store__Dept__features.csv__MarkDown5__lag_42', 'Store__Dept__features.csv__CPI__lag_39', 'Store__Dept__features.csv__CPI__lag_40', 'Store__Dept__features.csv__CPI__lag_41', 'Store__Dept__features.csv__CPI__lag_42', 'Store__Dept__features.csv__Unemployment__lag_39', 'Store__Dept__features.csv__Unemployment__lag_40', 'Store__Dept__features.csv__Unemployment__lag_41', 'Store__Dept__features.csv__Unemployment__lag_42', 'Store__Dept__features.csv__IsHoliday__lag_39', 'Store__Dept__features.csv__IsHoliday__lag_40', 'Store__Dept__features.csv__IsHoliday__lag_41', 'Store__Dept__features.csv__IsHoliday__lag_42', 'Store__Dept__Weekly_Sales_roll_mean_42', 'Store__Dept__Weekly_Sales_roll_mean_43', 'Store__Dept__Weekly_Sales_roll_mean_44', 'Store__Dept__Weekly_Sales_roll_std_42', 'Store__Dept__Weekly_Sales_roll_std_43', 'Store__Dept__Weekly_Sales_roll_std_44', 'Store__Dept__Weekly_Sales_roll_median_42', 'Store__Dept__Weekly_Sales_roll_median_43', 'Store__Dept__Weekly_Sales_roll_median_44', 'Store__Dept__Weekly_Sales_roll_max_42', 'Store__Dept__Weekly_Sales_roll_max_43', 'Store__Dept__Weekly_Sales_roll_max_44', 'Store__Dept__Weekly_Sales_roll_min_42', 'Store__Dept__Weekly_Sales_roll_min_43', 'Store__Dept__Weekly_Sales_roll_min_44', 'Store__Dept__IsHoliday_roll_mean_42', 'Store__Dept__IsHoliday_roll_mean_43', 'Store__Dept__IsHoliday_roll_mean_44', 'Store__Dept__IsHoliday_roll_std_42', 'Store__Dept__IsHoliday_roll_std_43', 'Store__Dept__IsHoliday_roll_std_44', 'Store__Dept__IsHoliday_roll_median_42', 'Store__Dept__IsHoliday_roll_median_43', 'Store__Dept__IsHoliday_roll_median_44', 'Store__Dept__IsHoliday_roll_max_42', 'Store__Dept__IsHoliday_roll_max_43', 'Store__Dept__IsHoliday_roll_max_44', 'Store__Dept__IsHoliday_roll_min_42', 'Store__Dept__IsHoliday_roll_min_43', 'Store__Dept__IsHoliday_roll_min_44', 'Store__Dept__stores.csv__Size_roll_mean_42', 'Store__Dept__stores.csv__Size_roll_mean_43', 'Store__Dept__stores.csv__Size_roll_mean_44', 'Store__Dept__stores.csv__Size_roll_std_42', 'Store__Dept__stores.csv__Size_roll_std_43', 'Store__Dept__stores.csv__Size_roll_std_44', 'Store__Dept__stores.csv__Size_roll_median_42', 'Store__Dept__stores.csv__Size_roll_median_43', 'Store__Dept__stores.csv__Size_roll_median_44', 'Store__Dept__stores.csv__Size_roll_max_42', 'Store__Dept__stores.csv__Size_roll_max_43', 'Store__Dept__stores.csv__Size_roll_max_44', 'Store__Dept__stores.csv__Size_roll_min_42', 'Store__Dept__stores.csv__Size_roll_min_43', 'Store__Dept__stores.csv__Size_roll_min_44', 'Store__Dept__features.csv__Temperature_roll_mean_42', 'Store__Dept__features.csv__Temperature_roll_mean_43', 'Store__Dept__features.csv__Temperature_roll_mean_44', 'Store__Dept__features.csv__Temperature_roll_std_42', 'Store__Dept__features.csv__Temperature_roll_std_43', 'Store__Dept__features.csv__Temperature_roll_std_44', 'Store__Dept__features.csv__Temperature_roll_median_42', 'Store__Dept__features.csv__Temperature_roll_median_43', 'Store__Dept__features.csv__Temperature_roll_median_44', 'Store__Dept__features.csv__Temperature_roll_max_42', 'Store__Dept__features.csv__Temperature_roll_max_43', 'Store__Dept__features.csv__Temperature_roll_max_44', 'Store__Dept__features.csv__Temperature_roll_min_42', 'Store__Dept__features.csv__Temperature_roll_min_43', 'Store__Dept__features.csv__Temperature_roll_min_44', 'Store__Dept__features.csv__Fuel_Price_roll_mean_42', 'Store__Dept__features.csv__Fuel_Price_roll_mean_43', 'Store__Dept__features.csv__Fuel_Price_roll_mean_44', 'Store__Dept__features.csv__Fuel_Price_roll_std_42', 'Store__Dept__features.csv__Fuel_Price_roll_std_43', 'Store__Dept__features.csv__Fuel_Price_roll_std_44', 'Store__Dept__features.csv__Fuel_Price_roll_median_42', 'Store__Dept__features.csv__Fuel_Price_roll_median_43', 'Store__Dept__features.csv__Fuel_Price_roll_median_44', 'Store__Dept__features.csv__Fuel_Price_roll_max_42', 'Store__Dept__features.csv__Fuel_Price_roll_max_43', 'Store__Dept__features.csv__Fuel_Price_roll_max_44', 'Store__Dept__features.csv__Fuel_Price_roll_min_42', 'Store__Dept__features.csv__Fuel_Price_roll_min_43', 'Store__Dept__features.csv__Fuel_Price_roll_min_44', 'Store__Dept__features.csv__MarkDown1_roll_mean_42', 'Store__Dept__features.csv__MarkDown1_roll_mean_43', 'Store__Dept__features.csv__MarkDown1_roll_mean_44', 'Store__Dept__features.csv__MarkDown1_roll_std_42', 'Store__Dept__features.csv__MarkDown1_roll_std_43', 'Store__Dept__features.csv__MarkDown1_roll_std_44', 'Store__Dept__features.csv__MarkDown1_roll_median_42', 'Store__Dept__features.csv__MarkDown1_roll_median_43', 'Store__Dept__features.csv__MarkDown1_roll_median_44', 'Store__Dept__features.csv__MarkDown1_roll_max_42', 'Store__Dept__features.csv__MarkDown1_roll_max_43', 'Store__Dept__features.csv__MarkDown1_roll_max_44', 'Store__Dept__features.csv__MarkDown1_roll_min_42', 'Store__Dept__features.csv__MarkDown1_roll_min_43', 'Store__Dept__features.csv__MarkDown1_roll_min_44', 'Store__Dept__features.csv__MarkDown2_roll_mean_42', 'Store__Dept__features.csv__MarkDown2_roll_mean_43', 'Store__Dept__features.csv__MarkDown2_roll_mean_44', 'Store__Dept__features.csv__MarkDown2_roll_std_42', 'Store__Dept__features.csv__MarkDown2_roll_std_43', 'Store__Dept__features.csv__MarkDown2_roll_std_44', 'Store__Dept__features.csv__MarkDown2_roll_median_42', 'Store__Dept__features.csv__MarkDown2_roll_median_43', 'Store__Dept__features.csv__MarkDown2_roll_median_44', 'Store__Dept__features.csv__MarkDown2_roll_max_42', 'Store__Dept__features.csv__MarkDown2_roll_max_43', 'Store__Dept__features.csv__MarkDown2_roll_max_44', 'Store__Dept__features.csv__MarkDown2_roll_min_42', 'Store__Dept__features.csv__MarkDown2_roll_min_43', 'Store__Dept__features.csv__MarkDown2_roll_min_44', 'Store__Dept__features.csv__MarkDown3_roll_mean_42', 'Store__Dept__features.csv__MarkDown3_roll_mean_43', 'Store__Dept__features.csv__MarkDown3_roll_mean_44', 'Store__Dept__features.csv__MarkDown3_roll_std_42', 'Store__Dept__features.csv__MarkDown3_roll_std_43', 'Store__Dept__features.csv__MarkDown3_roll_std_44', 'Store__Dept__features.csv__MarkDown3_roll_median_42', 'Store__Dept__features.csv__MarkDown3_roll_median_43', 'Store__Dept__features.csv__MarkDown3_roll_median_44', 'Store__Dept__features.csv__MarkDown3_roll_max_42', 'Store__Dept__features.csv__MarkDown3_roll_max_43', 'Store__Dept__features.csv__MarkDown3_roll_max_44', 'Store__Dept__features.csv__MarkDown3_roll_min_42', 'Store__Dept__features.csv__MarkDown3_roll_min_43', 'Store__Dept__features.csv__MarkDown3_roll_min_44', 'Store__Dept__features.csv__MarkDown4_roll_mean_42', 'Store__Dept__features.csv__MarkDown4_roll_mean_43', 'Store__Dept__features.csv__MarkDown4_roll_mean_44', 'Store__Dept__features.csv__MarkDown4_roll_std_42', 'Store__Dept__features.csv__MarkDown4_roll_std_43', 'Store__Dept__features.csv__MarkDown4_roll_std_44', 'Store__Dept__features.csv__MarkDown4_roll_median_42', 'Store__Dept__features.csv__MarkDown4_roll_median_43', 'Store__Dept__features.csv__MarkDown4_roll_median_44', 'Store__Dept__features.csv__MarkDown4_roll_max_42', 'Store__Dept__features.csv__MarkDown4_roll_max_43', 'Store__Dept__features.csv__MarkDown4_roll_max_44', 'Store__Dept__features.csv__MarkDown4_roll_min_42', 'Store__Dept__features.csv__MarkDown4_roll_min_43', 'Store__Dept__features.csv__MarkDown4_roll_min_44', 'Store__Dept__features.csv__MarkDown5_roll_mean_42', 'Store__Dept__features.csv__MarkDown5_roll_mean_43', 'Store__Dept__features.csv__MarkDown5_roll_mean_44', 'Store__Dept__features.csv__MarkDown5_roll_std_42', 'Store__Dept__features.csv__MarkDown5_roll_std_43', 'Store__Dept__features.csv__MarkDown5_roll_std_44', 'Store__Dept__features.csv__MarkDown5_roll_median_42', 'Store__Dept__features.csv__MarkDown5_roll_median_43', 'Store__Dept__features.csv__MarkDown5_roll_median_44', 'Store__Dept__features.csv__MarkDown5_roll_max_42', 'Store__Dept__features.csv__MarkDown5_roll_max_43', 'Store__Dept__features.csv__MarkDown5_roll_max_44', 'Store__Dept__features.csv__MarkDown5_roll_min_42', 'Store__Dept__features.csv__MarkDown5_roll_min_43', 'Store__Dept__features.csv__MarkDown5_roll_min_44', 'Store__Dept__features.csv__CPI_roll_mean_42', 'Store__Dept__features.csv__CPI_roll_mean_43', 'Store__Dept__features.csv__CPI_roll_mean_44', 'Store__Dept__features.csv__CPI_roll_std_42', 'Store__Dept__features.csv__CPI_roll_std_43', 'Store__Dept__features.csv__CPI_roll_std_44', 'Store__Dept__features.csv__CPI_roll_median_42', 'Store__Dept__features.csv__CPI_roll_median_43', 'Store__Dept__features.csv__CPI_roll_median_44', 'Store__Dept__features.csv__CPI_roll_max_42', 'Store__Dept__features.csv__CPI_roll_max_43', 'Store__Dept__features.csv__CPI_roll_max_44', 'Store__Dept__features.csv__CPI_roll_min_42', 'Store__Dept__features.csv__CPI_roll_min_43', 'Store__Dept__features.csv__CPI_roll_min_44', 'Store__Dept__features.csv__Unemployment_roll_mean_42', 'Store__Dept__features.csv__Unemployment_roll_mean_43', 'Store__Dept__features.csv__Unemployment_roll_mean_44', 'Store__Dept__features.csv__Unemployment_roll_std_42', 'Store__Dept__features.csv__Unemployment_roll_std_43', 'Store__Dept__features.csv__Unemployment_roll_std_44', 'Store__Dept__features.csv__Unemployment_roll_median_42', 'Store__Dept__features.csv__Unemployment_roll_median_43', 'Store__Dept__features.csv__Unemployment_roll_median_44', 'Store__Dept__features.csv__Unemployment_roll_max_42', 'Store__Dept__features.csv__Unemployment_roll_max_43', 'Store__Dept__features.csv__Unemployment_roll_max_44', 'Store__Dept__features.csv__Unemployment_roll_min_42', 'Store__Dept__features.csv__Unemployment_roll_min_43', 'Store__Dept__features.csv__Unemployment_roll_min_44', 'Store__Dept__features.csv__IsHoliday_roll_mean_42', 'Store__Dept__features.csv__IsHoliday_roll_mean_43', 'Store__Dept__features.csv__IsHoliday_roll_mean_44', 'Store__Dept__features.csv__IsHoliday_roll_std_42', 'Store__Dept__features.csv__IsHoliday_roll_std_43', 'Store__Dept__features.csv__IsHoliday_roll_std_44', 'Store__Dept__features.csv__IsHoliday_roll_median_42', 'Store__Dept__features.csv__IsHoliday_roll_median_43', 'Store__Dept__features.csv__IsHoliday_roll_median_44', 'Store__Dept__features.csv__IsHoliday_roll_max_42', 'Store__Dept__features.csv__IsHoliday_roll_max_43', 'Store__Dept__features.csv__IsHoliday_roll_max_44', 'Store__Dept__features.csv__IsHoliday_roll_min_42', 'Store__Dept__features.csv__IsHoliday_roll_min_43', 'Store__Dept__features.csv__IsHoliday_roll_min_44', 'Store__Dept__Weekly_Sales__ewm_39', 'Store__Dept__Weekly_Sales__ewm_40', 'Store__Dept__Weekly_Sales__ewm_41', 'Store__Dept__Weekly_Sales__ewm_42', 'Store__Dept__IsHoliday__ewm_39', 'Store__Dept__IsHoliday__ewm_40', 'Store__Dept__IsHoliday__ewm_41', 'Store__Dept__IsHoliday__ewm_42', 'Store__Dept__stores.csv__Size__ewm_39', 'Store__Dept__stores.csv__Size__ewm_40', 'Store__Dept__stores.csv__Size__ewm_41', 'Store__Dept__stores.csv__Size__ewm_42', 'Store__Dept__features.csv__Temperature__ewm_39', 'Store__Dept__features.csv__Temperature__ewm_40', 'Store__Dept__features.csv__Temperature__ewm_41', 'Store__Dept__features.csv__Temperature__ewm_42', 'Store__Dept__features.csv__Fuel_Price__ewm_39', 'Store__Dept__features.csv__Fuel_Price__ewm_40', 'Store__Dept__features.csv__Fuel_Price__ewm_41', 'Store__Dept__features.csv__Fuel_Price__ewm_42', 'Store__Dept__features.csv__MarkDown1__ewm_39', 'Store__Dept__features.csv__MarkDown1__ewm_40', 'Store__Dept__features.csv__MarkDown1__ewm_41', 'Store__Dept__features.csv__MarkDown1__ewm_42', 'Store__Dept__features.csv__MarkDown2__ewm_39', 'Store__Dept__features.csv__MarkDown2__ewm_40', 'Store__Dept__features.csv__MarkDown2__ewm_41', 'Store__Dept__features.csv__MarkDown2__ewm_42', 'Store__Dept__features.csv__MarkDown3__ewm_39', 'Store__Dept__features.csv__MarkDown3__ewm_40', 'Store__Dept__features.csv__MarkDown3__ewm_41', 'Store__Dept__features.csv__MarkDown3__ewm_42', 'Store__Dept__features.csv__MarkDown4__ewm_39', 'Store__Dept__features.csv__MarkDown4__ewm_40', 'Store__Dept__features.csv__MarkDown4__ewm_41', 'Store__Dept__features.csv__MarkDown4__ewm_42', 'Store__Dept__features.csv__MarkDown5__ewm_39', 'Store__Dept__features.csv__MarkDown5__ewm_40', 'Store__Dept__features.csv__MarkDown5__ewm_41', 'Store__Dept__features.csv__MarkDown5__ewm_42', 'Store__Dept__features.csv__CPI__ewm_39', 'Store__Dept__features.csv__CPI__ewm_40', 'Store__Dept__features.csv__CPI__ewm_41', 'Store__Dept__features.csv__CPI__ewm_42', 'Store__Dept__features.csv__Unemployment__ewm_39', 'Store__Dept__features.csv__Unemployment__ewm_40', 'Store__Dept__features.csv__Unemployment__ewm_41', 'Store__Dept__features.csv__Unemployment__ewm_42', 'Store__Dept__features.csv__IsHoliday__ewm_39', 'Store__Dept__features.csv__IsHoliday__ewm_40', 'Store__Dept__features.csv__IsHoliday__ewm_41', 'Store__Dept__features.csv__IsHoliday__ewm_42']\n",
      "   INFO ->  start training model\n",
      "   INFO ->  (421570, 322)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training with validation\n",
      "Training until validation scores don't improve for 150 rounds\n",
      "[100]\ttraining's rmse: 11486.8\tvalid_1's rmse: 9337.9\n",
      "[200]\ttraining's rmse: 8439.96\tvalid_1's rmse: 5748.24\n",
      "[300]\ttraining's rmse: 7537.04\tvalid_1's rmse: 5006.07\n",
      "[400]\ttraining's rmse: 7170.1\tvalid_1's rmse: 4824.32\n",
      "[500]\ttraining's rmse: 6979.62\tvalid_1's rmse: 4745.19\n",
      "[600]\ttraining's rmse: 6845.89\tvalid_1's rmse: 4696.79\n",
      "[700]\ttraining's rmse: 6760.18\tvalid_1's rmse: 4723.45\n",
      "Early stopping, best iteration is:\n",
      "[628]\ttraining's rmse: 6815.99\tvalid_1's rmse: 4688.85\n",
      "MSE: 21985298.147657212\n",
      "Finished in 0:00:20.895184\n",
      "ReTraining on all data\n",
      "[100]\ttraining's rmse: 11070.6\n",
      "[200]\ttraining's rmse: 7926.8\n",
      "[300]\ttraining's rmse: 7034.74\n",
      "[400]\ttraining's rmse: 6660.45\n",
      "[500]\ttraining's rmse: 6474.24\n",
      "[600]\ttraining's rmse: 6345.12\n",
      "[700]\ttraining's rmse: 6262.96\n",
      "Finished in 0:00:24.454681\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "   INFO ->  (421570, 322)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training with validation\n",
      "[0]\tvalidation_0-rmse:26988.55273\n",
      "Will train until validation_0-rmse hasn't improved in 100 rounds.\n",
      "[100]\tvalidation_0-rmse:11106.44629\n",
      "[200]\tvalidation_0-rmse:6042.85547\n",
      "[300]\tvalidation_0-rmse:4652.65088\n",
      "[400]\tvalidation_0-rmse:4268.49365\n",
      "[500]\tvalidation_0-rmse:4145.81543\n",
      "[600]\tvalidation_0-rmse:4099.48242\n",
      "[700]\tvalidation_0-rmse:4076.93237\n",
      "[800]\tvalidation_0-rmse:4068.92334\n",
      "[900]\tvalidation_0-rmse:4064.46729\n",
      "[1000]\tvalidation_0-rmse:4060.95679\n",
      "[1100]\tvalidation_0-rmse:4057.40210\n",
      "[1200]\tvalidation_0-rmse:4056.21289\n",
      "[1300]\tvalidation_0-rmse:4054.61450\n",
      "[1400]\tvalidation_0-rmse:4053.05664\n",
      "[1500]\tvalidation_0-rmse:4051.39356\n",
      "[1600]\tvalidation_0-rmse:4050.07178\n",
      "[1700]\tvalidation_0-rmse:4048.80835\n",
      "[1800]\tvalidation_0-rmse:4047.33569\n",
      "[1900]\tvalidation_0-rmse:4046.69019\n",
      "[2000]\tvalidation_0-rmse:4045.33667\n",
      "[2100]\tvalidation_0-rmse:4044.17334\n",
      "[2200]\tvalidation_0-rmse:4043.46802\n",
      "[2300]\tvalidation_0-rmse:4041.12256\n",
      "[2400]\tvalidation_0-rmse:4040.35596\n",
      "[2500]\tvalidation_0-rmse:4039.27002\n",
      "[2600]\tvalidation_0-rmse:4038.24829\n",
      "[2700]\tvalidation_0-rmse:4037.33203\n",
      "[2800]\tvalidation_0-rmse:4037.06177\n",
      "[2900]\tvalidation_0-rmse:4037.31104\n",
      "Stopping. Best iteration:\n",
      "[2809]\tvalidation_0-rmse:4036.76269\n",
      "\n",
      "MSE: 16295463.0\n",
      "Finished in 0:29:32.598362\n",
      "ReTraining on all data\n",
      "[0]\tvalidation_0-rmse:27515.66016\n",
      "Will train until validation_0-rmse hasn't improved in 100 rounds.\n",
      "[100]\tvalidation_0-rmse:12054.64746\n",
      "[200]\tvalidation_0-rmse:7236.76562\n",
      "[300]\tvalidation_0-rmse:5814.93018\n",
      "[400]\tvalidation_0-rmse:5308.95410\n",
      "[500]\tvalidation_0-rmse:5072.85547\n",
      "[600]\tvalidation_0-rmse:4941.32324\n",
      "[700]\tvalidation_0-rmse:4860.37402\n",
      "[800]\tvalidation_0-rmse:4808.16650\n",
      "[900]\tvalidation_0-rmse:4772.63477\n",
      "[1000]\tvalidation_0-rmse:4742.34863\n",
      "[1100]\tvalidation_0-rmse:4715.46533\n",
      "[1200]\tvalidation_0-rmse:4691.59375\n",
      "[1300]\tvalidation_0-rmse:4668.03174\n",
      "[1400]\tvalidation_0-rmse:4645.43066\n",
      "[1500]\tvalidation_0-rmse:4624.38428\n",
      "[1600]\tvalidation_0-rmse:4605.25830\n",
      "[1700]\tvalidation_0-rmse:4586.43848\n",
      "[1800]\tvalidation_0-rmse:4569.41113\n",
      "[1900]\tvalidation_0-rmse:4552.44434\n",
      "[2000]\tvalidation_0-rmse:4533.68164\n",
      "[2100]\tvalidation_0-rmse:4517.34863\n",
      "[2200]\tvalidation_0-rmse:4502.64355\n",
      "[2300]\tvalidation_0-rmse:4487.02930\n",
      "[2400]\tvalidation_0-rmse:4471.52832\n",
      "[2500]\tvalidation_0-rmse:4457.94678\n",
      "[2600]\tvalidation_0-rmse:4443.53613\n",
      "[2700]\tvalidation_0-rmse:4429.92969\n",
      "[2800]\tvalidation_0-rmse:4416.11572\n",
      "[2900]\tvalidation_0-rmse:4404.19141\n",
      "[2999]\tvalidation_0-rmse:4391.83350\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "   INFO ->  feature importance\n",
      "   INFO ->                                               feature  feature_importance\n",
      "0                                          IsHoliday                 181\n",
      "1                                   stores.csv__Type                  92\n",
      "2                                   stores.csv__Size                 403\n",
      "3                          features.csv__Temperature                 196\n",
      "4                           features.csv__Fuel_Price                  47\n",
      "..                                               ...                 ...\n",
      "317  Store__Dept__features.csv__Unemployment__ewm_42                   8\n",
      "318     Store__Dept__features.csv__IsHoliday__ewm_39                   3\n",
      "319     Store__Dept__features.csv__IsHoliday__ewm_40                  13\n",
      "320     Store__Dept__features.csv__IsHoliday__ewm_41                  63\n",
      "321     Store__Dept__features.csv__IsHoliday__ewm_42                  10\n",
      "\n",
      "[322 rows x 2 columns]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Finished in 0:51:00.593180\n"
     ]
    }
   ],
   "source": [
    "sub = autox.get_submit_ts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "576abf46",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-15T04:55:20.111524Z",
     "start_time": "2021-11-15T04:55:19.992478Z"
    }
   },
   "outputs": [],
   "source": [
    "submit = pd.read_csv('../data/walmart_recruiting/sampleSubmission.csv')\n",
    "sub['Id'] = submit['Id']\n",
    "sub = sub[['Id', 'Weekly_Sales']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "5e16a1e0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-15T04:55:21.560929Z",
     "start_time": "2021-11-15T04:55:21.535975Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>Weekly_Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1_1_2012-11-02</td>\n",
       "      <td>25213.679511</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1_1_2012-11-09</td>\n",
       "      <td>25506.688117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1_1_2012-11-16</td>\n",
       "      <td>22432.246294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1_1_2012-11-23</td>\n",
       "      <td>39339.997853</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1_1_2012-11-30</td>\n",
       "      <td>33034.963365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115059</th>\n",
       "      <td>45_98_2013-06-28</td>\n",
       "      <td>681.154183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115060</th>\n",
       "      <td>45_98_2013-07-05</td>\n",
       "      <td>818.599082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115061</th>\n",
       "      <td>45_98_2013-07-12</td>\n",
       "      <td>736.459453</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115062</th>\n",
       "      <td>45_98_2013-07-19</td>\n",
       "      <td>768.392045</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115063</th>\n",
       "      <td>45_98_2013-07-26</td>\n",
       "      <td>715.306885</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>115064 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                      Id  Weekly_Sales\n",
       "0         1_1_2012-11-02  25213.679511\n",
       "1         1_1_2012-11-09  25506.688117\n",
       "2         1_1_2012-11-16  22432.246294\n",
       "3         1_1_2012-11-23  39339.997853\n",
       "4         1_1_2012-11-30  33034.963365\n",
       "...                  ...           ...\n",
       "115059  45_98_2013-06-28    681.154183\n",
       "115060  45_98_2013-07-05    818.599082\n",
       "115061  45_98_2013-07-12    736.459453\n",
       "115062  45_98_2013-07-19    768.392045\n",
       "115063  45_98_2013-07-26    715.306885\n",
       "\n",
       "[115064 rows x 2 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "e8f36152",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-11-15T04:55:24.482727Z",
     "start_time": "2021-11-15T04:55:24.115294Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "sub.to_csv(\"./sub/autox_1114_walmart_recruiting_oneclick.csv\", index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e3d537a6",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
